{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: evaluate in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (0.4.1)\n",
      "Requirement already satisfied: datasets>=2.0.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from evaluate) (2.15.0)\n",
      "Requirement already satisfied: numpy>=1.17 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from evaluate) (1.26.4)\n",
      "Requirement already satisfied: dill in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from evaluate) (0.3.7)\n",
      "Requirement already satisfied: pandas in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from evaluate) (2.1.3)\n",
      "Requirement already satisfied: requests>=2.19.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from evaluate) (2.31.0)\n",
      "Requirement already satisfied: tqdm>=4.62.1 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from evaluate) (4.66.1)\n",
      "Requirement already satisfied: xxhash in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from evaluate) (3.4.1)\n",
      "Requirement already satisfied: multiprocess in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from evaluate) (0.70.15)\n",
      "Requirement already satisfied: fsspec[http]>=2021.05.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from evaluate) (2023.10.0)\n",
      "Requirement already satisfied: huggingface-hub>=0.7.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from evaluate) (0.19.4)\n",
      "Requirement already satisfied: packaging in c:\\users\\jihed\\appdata\\roaming\\python\\python311\\site-packages (from evaluate) (23.2)\n",
      "Requirement already satisfied: responses<0.19 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from evaluate) (0.18.0)\n",
      "Requirement already satisfied: pyarrow>=8.0.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from datasets>=2.0.0->evaluate) (14.0.1)\n",
      "Requirement already satisfied: pyarrow-hotfix in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from datasets>=2.0.0->evaluate) (0.5)\n",
      "Requirement already satisfied: aiohttp in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from datasets>=2.0.0->evaluate) (3.9.0)\n",
      "Requirement already satisfied: pyyaml>=5.1 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from datasets>=2.0.0->evaluate) (6.0.1)\n",
      "Requirement already satisfied: filelock in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from huggingface-hub>=0.7.0->evaluate) (3.13.1)\n",
      "Requirement already satisfied: typing-extensions>=3.7.4.3 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from huggingface-hub>=0.7.0->evaluate) (4.8.0)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests>=2.19.0->evaluate) (3.3.2)\n",
      "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests>=2.19.0->evaluate) (3.4)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests>=2.19.0->evaluate) (2.1.0)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests>=2.19.0->evaluate) (2023.11.17)\n",
      "Requirement already satisfied: colorama in c:\\users\\jihed\\appdata\\roaming\\python\\python311\\site-packages (from tqdm>=4.62.1->evaluate) (0.4.6)\n",
      "Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\jihed\\appdata\\roaming\\python\\python311\\site-packages (from pandas->evaluate) (2.8.2)\n",
      "Requirement already satisfied: pytz>=2020.1 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pandas->evaluate) (2023.3.post1)\n",
      "Requirement already satisfied: tzdata>=2022.1 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pandas->evaluate) (2023.3)\n",
      "Requirement already satisfied: attrs>=17.3.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from aiohttp->datasets>=2.0.0->evaluate) (23.1.0)\n",
      "Requirement already satisfied: multidict<7.0,>=4.5 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from aiohttp->datasets>=2.0.0->evaluate) (6.0.4)\n",
      "Requirement already satisfied: yarl<2.0,>=1.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.9.3)\n",
      "Requirement already satisfied: frozenlist>=1.1.1 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.4.0)\n",
      "Requirement already satisfied: aiosignal>=1.1.2 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.3.1)\n",
      "Requirement already satisfied: six>=1.5 in c:\\users\\jihed\\appdata\\roaming\\python\\python311\\site-packages (from python-dateutil>=2.8.2->pandas->evaluate) (1.16.0)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "[notice] A new release of pip available: 22.3 -> 24.0\n",
      "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: TensorFlow in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (2.15.0)\n",
      "Requirement already satisfied: tensorflow-intel==2.15.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from TensorFlow) (2.15.0)\n",
      "Requirement already satisfied: absl-py>=1.0.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (2.0.0)\n",
      "Requirement already satisfied: astunparse>=1.6.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (1.6.3)\n",
      "Requirement already satisfied: flatbuffers>=23.5.26 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (23.5.26)\n",
      "Requirement already satisfied: gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (0.5.4)\n",
      "Requirement already satisfied: google-pasta>=0.1.1 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (0.2.0)\n",
      "Requirement already satisfied: h5py>=2.9.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (3.10.0)\n",
      "Requirement already satisfied: libclang>=13.0.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (16.0.6)\n",
      "Requirement already satisfied: ml-dtypes~=0.2.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (0.2.0)\n",
      "Requirement already satisfied: numpy<2.0.0,>=1.23.5 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (1.26.4)\n",
      "Requirement already satisfied: opt-einsum>=2.3.2 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (3.3.0)\n",
      "Requirement already satisfied: packaging in c:\\users\\jihed\\appdata\\roaming\\python\\python311\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (23.2)\n",
      "Requirement already satisfied: protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (4.23.4)\n",
      "Requirement already satisfied: setuptools in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (65.5.0)\n",
      "Requirement already satisfied: six>=1.12.0 in c:\\users\\jihed\\appdata\\roaming\\python\\python311\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (1.16.0)\n",
      "Requirement already satisfied: termcolor>=1.1.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (2.3.0)\n",
      "Requirement already satisfied: typing-extensions>=3.6.6 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (4.8.0)\n",
      "Requirement already satisfied: wrapt<1.15,>=1.11.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (1.14.1)\n",
      "Requirement already satisfied: tensorflow-io-gcs-filesystem>=0.23.1 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (0.31.0)\n",
      "Requirement already satisfied: grpcio<2.0,>=1.24.3 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (1.59.3)\n",
      "Requirement already satisfied: tensorboard<2.16,>=2.15 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (2.15.1)\n",
      "Requirement already satisfied: tensorflow-estimator<2.16,>=2.15.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (2.15.0)\n",
      "Requirement already satisfied: keras<2.16,>=2.15.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorflow-intel==2.15.0->TensorFlow) (2.15.0)\n",
      "Requirement already satisfied: wheel<1.0,>=0.23.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from astunparse>=1.6.0->tensorflow-intel==2.15.0->TensorFlow) (0.41.3)\n",
      "Requirement already satisfied: google-auth<3,>=1.6.3 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->TensorFlow) (2.23.4)\n",
      "Requirement already satisfied: google-auth-oauthlib<2,>=0.5 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->TensorFlow) (1.1.0)\n",
      "Requirement already satisfied: markdown>=2.6.8 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->TensorFlow) (3.5.1)\n",
      "Requirement already satisfied: requests<3,>=2.21.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->TensorFlow) (2.31.0)\n",
      "Requirement already satisfied: tensorboard-data-server<0.8.0,>=0.7.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->TensorFlow) (0.7.2)\n",
      "Requirement already satisfied: werkzeug>=1.0.1 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->TensorFlow) (3.0.1)\n",
      "Requirement already satisfied: cachetools<6.0,>=2.0.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->TensorFlow) (5.3.2)\n",
      "Requirement already satisfied: pyasn1-modules>=0.2.1 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->TensorFlow) (0.3.0)\n",
      "Requirement already satisfied: rsa<5,>=3.1.4 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->TensorFlow) (4.9)\n",
      "Requirement already satisfied: requests-oauthlib>=0.7.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from google-auth-oauthlib<2,>=0.5->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->TensorFlow) (1.3.1)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests<3,>=2.21.0->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->TensorFlow) (3.3.2)\n",
      "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests<3,>=2.21.0->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->TensorFlow) (3.4)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests<3,>=2.21.0->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->TensorFlow) (2.1.0)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests<3,>=2.21.0->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->TensorFlow) (2023.11.17)\n",
      "Requirement already satisfied: MarkupSafe>=2.1.1 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from werkzeug>=1.0.1->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->TensorFlow) (2.1.3)\n",
      "Requirement already satisfied: pyasn1<0.6.0,>=0.4.6 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->TensorFlow) (0.5.1)\n",
      "Requirement already satisfied: oauthlib>=3.0.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<2,>=0.5->tensorboard<2.16,>=2.15->tensorflow-intel==2.15.0->TensorFlow) (3.2.2)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "[notice] A new release of pip available: 22.3 -> 24.0\n",
      "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: accelerate in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (0.27.0)\n",
      "Requirement already satisfied: numpy>=1.17 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from accelerate) (1.26.4)\n",
      "Requirement already satisfied: packaging>=20.0 in c:\\users\\jihed\\appdata\\roaming\\python\\python311\\site-packages (from accelerate) (23.2)\n",
      "Requirement already satisfied: psutil in c:\\users\\jihed\\appdata\\roaming\\python\\python311\\site-packages (from accelerate) (5.9.6)\n",
      "Requirement already satisfied: pyyaml in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from accelerate) (6.0.1)\n",
      "Requirement already satisfied: torch>=1.10.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from accelerate) (2.1.1+cu118)\n",
      "Requirement already satisfied: huggingface-hub in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from accelerate) (0.19.4)\n",
      "Requirement already satisfied: safetensors>=0.3.1 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from accelerate) (0.4.0)\n",
      "Requirement already satisfied: filelock in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from torch>=1.10.0->accelerate) (3.13.1)\n",
      "Requirement already satisfied: typing-extensions in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from torch>=1.10.0->accelerate) (4.8.0)\n",
      "Requirement already satisfied: sympy in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from torch>=1.10.0->accelerate) (1.12)\n",
      "Requirement already satisfied: networkx in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from torch>=1.10.0->accelerate) (3.2.1)\n",
      "Requirement already satisfied: jinja2 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from torch>=1.10.0->accelerate) (3.1.2)\n",
      "Requirement already satisfied: fsspec in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from torch>=1.10.0->accelerate) (2023.10.0)\n",
      "Requirement already satisfied: requests in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from huggingface-hub->accelerate) (2.31.0)\n",
      "Requirement already satisfied: tqdm>=4.42.1 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from huggingface-hub->accelerate) (4.66.1)\n",
      "Requirement already satisfied: colorama in c:\\users\\jihed\\appdata\\roaming\\python\\python311\\site-packages (from tqdm>=4.42.1->huggingface-hub->accelerate) (0.4.6)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.3)\n",
      "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests->huggingface-hub->accelerate) (3.3.2)\n",
      "Requirement already satisfied: idna<4,>=2.5 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests->huggingface-hub->accelerate) (3.4)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests->huggingface-hub->accelerate) (2.1.0)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from requests->huggingface-hub->accelerate) (2023.11.17)\n",
      "Requirement already satisfied: mpmath>=0.19 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "[notice] A new release of pip available: 22.3 -> 24.0\n",
      "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: scikit-learn in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (1.3.2)\n",
      "Requirement already satisfied: numpy<2.0,>=1.17.3 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from scikit-learn) (1.26.4)\n",
      "Requirement already satisfied: scipy>=1.5.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from scikit-learn) (1.11.4)\n",
      "Requirement already satisfied: joblib>=1.1.1 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from scikit-learn) (1.3.2)\n",
      "Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\users\\jihed\\appdata\\local\\programs\\python\\python311\\lib\\site-packages (from scikit-learn) (3.2.0)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n",
      "[notice] A new release of pip available: 22.3 -> 24.0\n",
      "[notice] To update, run: python.exe -m pip install --upgrade pip\n"
     ]
    }
   ],
   "source": [
    "#!pip install evaluate\n",
    "#!pip install TensorFlow\n",
    "#!pip install -U accelerate\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import pyreadr\n",
    "from datasets import Dataset, load_dataset\n",
    "from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, pipeline\n",
    "from transformers.pipelines.pt_utils import KeyDataset\n",
    "import torch\n",
    "import evaluate\n",
    "import accelerate\n",
    "\n",
    "#!pip install scikit-learn\n",
    "import sklearn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = pd.read_parquet(\"./train.parquet\")\n",
    "eval = pd.read_parquet(\"./eval.parquet\")\n",
    "test = pd.read_parquet(\"./test.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = train[['text', 'housing_coded']]\n",
    "eval = eval[['text', 'housing_coded']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "id2label = {\n",
    "    0: \"Non-housing\",\n",
    "    1: \"Housing\",\n",
    "}\n",
    "label2id = {\n",
    "    \"Non-housing\": 0,\n",
    "    \"Housing\": 1,\n",
    "}\n",
    "\n",
    "train['labels'] = train['housing_coded'].map(id2label)\n",
    "train['labels'] = train['labels'].map(label2id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "id2label = {\n",
    "    0: \"Non-housing\",\n",
    "    1: \"Housing\",\n",
    "}\n",
    "label2id = {\n",
    "    \"Non-housing\": 0,\n",
    "    \"Housing\": 1,\n",
    "}\n",
    "\n",
    "eval['labels'] = eval['housing_coded'].map(id2label)\n",
    "eval['labels'] = eval['labels'].map(label2id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = Dataset.from_pandas(train)\n",
    "eval = Dataset.from_pandas(eval)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.weight', 'classifier.bias', 'pre_classifier.weight']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
     ]
    }
   ],
   "source": [
    "model_name = \"distilbert-base-uncased\"\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
    "model = AutoModelForSequenceClassification.from_pretrained(model_name,\n",
    "                                                           num_labels = 2,\n",
    "                                                           id2label=id2label,\n",
    "                                                           label2id=label2id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "batch_size = 16\n",
    "\n",
    "logging_steps = len(train) // batch_size\n",
    "\n",
    "model_name = f\"{model_name}-finetuned-housing\"\n",
    "\n",
    "training_args = TrainingArguments(\n",
    "    output_dir=model_name,\n",
    "    num_train_epochs=4,\n",
    "    learning_rate=2e-5,\n",
    "    per_device_train_batch_size=batch_size,\n",
    "    per_device_eval_batch_size=batch_size,\n",
    "    weight_decay=0.01,\n",
    "    evaluation_strategy=\"epoch\",\n",
    "    disable_tqdm=False,\n",
    "    logging_steps=logging_steps,\n",
    "    push_to_hub=False,\n",
    "    log_level=\"error\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_metrics(eval_pred):\n",
    "    f1 = evaluate.load(\"f1\")\n",
    "    accuracy = evaluate.load(\"accuracy\")\n",
    "\n",
    "    logits, labels = eval_pred\n",
    "    predictions = np.argmax(logits, axis=-1)\n",
    "\n",
    "    f1val = f1.compute(references=labels, predictions=predictions, average = \"weighted\")\n",
    "    accval = accuracy.compute(references=labels, predictions=predictions)\n",
    "    return {\"F1\": f1val, \"accuracy\": accval}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Map: 100%|██████████| 600/600 [00:00<00:00, 5112.77 examples/s]\n",
      "Map: 100%|██████████| 200/200 [00:00<00:00, 4685.80 examples/s]\n"
     ]
    }
   ],
   "source": [
    "def tokenize(batch):\n",
    "    return tokenizer(batch[\"text\"], truncation=True, padding=True)\n",
    "\n",
    "train = train.map(tokenize, batched=True)\n",
    "eval = eval.map(tokenize, batched=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 25%|██▌       | 38/152 [00:20<00:37,  3.00it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.5614, 'learning_rate': 1.5131578947368422e-05, 'epoch': 0.97}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                \n",
      " 25%|██▌       | 38/152 [00:26<00:37,  3.00it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.3055301606655121, 'eval_F1': {'f1': 0.9058000000000002}, 'eval_accuracy': {'accuracy': 0.91}, 'eval_runtime': 6.0284, 'eval_samples_per_second': 33.176, 'eval_steps_per_second': 2.156, 'epoch': 1.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 49%|████▊     | 74/152 [00:40<00:29,  2.61it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.256, 'learning_rate': 1.0263157894736844e-05, 'epoch': 1.95}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                \n",
      " 50%|█████     | 76/152 [00:45<00:25,  3.01it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.18077197670936584, 'eval_F1': {'f1': 0.9405914172376487}, 'eval_accuracy': {'accuracy': 0.94}, 'eval_runtime': 4.9202, 'eval_samples_per_second': 40.648, 'eval_steps_per_second': 2.642, 'epoch': 2.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 73%|███████▎  | 111/152 [00:59<00:15,  2.61it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.177, 'learning_rate': 5.394736842105264e-06, 'epoch': 2.92}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                 \n",
      " 75%|███████▌  | 114/152 [01:05<00:12,  3.03it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.19299931824207306, 'eval_F1': {'f1': 0.9357872544035409}, 'eval_accuracy': {'accuracy': 0.935}, 'eval_runtime': 5.4996, 'eval_samples_per_second': 36.366, 'eval_steps_per_second': 2.364, 'epoch': 3.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 97%|█████████▋| 148/152 [01:19<00:01,  2.61it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'loss': 0.1211, 'learning_rate': 5.263157894736843e-07, 'epoch': 3.89}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                 \n",
      "100%|██████████| 152/152 [01:27<00:00,  1.73it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'eval_loss': 0.17617520689964294, 'eval_F1': {'f1': 0.9502549781447306}, 'eval_accuracy': {'accuracy': 0.95}, 'eval_runtime': 7.2209, 'eval_samples_per_second': 27.697, 'eval_steps_per_second': 1.8, 'epoch': 4.0}\n",
      "{'train_runtime': 87.6749, 'train_samples_per_second': 27.374, 'train_steps_per_second': 1.734, 'train_loss': 0.27329943054600764, 'epoch': 4.0}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "TrainOutput(global_step=152, training_loss=0.27329943054600764, metrics={'train_runtime': 87.6749, 'train_samples_per_second': 27.374, 'train_steps_per_second': 1.734, 'train_loss': 0.27329943054600764, 'epoch': 4.0})"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "trainer = Trainer(\n",
    "    model=model,\n",
    "    args=training_args,\n",
    "    compute_metrics=compute_metrics,\n",
    "    train_dataset=train,\n",
    "    eval_dataset=eval,\n",
    "    tokenizer=tokenizer\n",
    ")\n",
    "\n",
    "trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "trainer.save_model(\"distilbert-base-uncased-finetuned-housing\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "model_ckpt = \"distilbert-base-uncased-finetuned-housing\"\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_ckpt)\n",
    "\n",
    "num_labels = 2\n",
    "id2label = {\n",
    "    0: \"Non-housing\",\n",
    "    1: \"Housing\",\n",
    "}\n",
    "label2id = {\n",
    "    \"Non-housing\": 0,\n",
    "    \"Housing\": 1,\n",
    "}\n",
    "\n",
    "model = AutoModelForSequenceClassification.from_pretrained(model_ckpt, num_labels=num_labels, id2label=id2label, label2id=label2id)\n",
    "classify = pipeline(task='text-classification',  # replace with whatever task you have\n",
    "                    model=model,\n",
    "                    tokenizer = tokenizer,\n",
    "                    top_k = None)\n",
    "batch_size = 8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "test = test[['text']]\n",
    "\n",
    "test = test.reset_index(drop=True)\n",
    "\n",
    "dataset = Dataset.from_pandas(test)\n",
    "\n",
    "def list_of_lists2pd(l):\n",
    "    inner_list = l\n",
    "    labels = [item['label'] for item in inner_list]\n",
    "    scores = [item['score'] for item in inner_list]\n",
    "    # Create a DataFrame from the extracted data\n",
    "    df = pd.DataFrame([scores], columns=labels)\n",
    "    return(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "res = [list_of_lists2pd(out) for out in classify(KeyDataset(dataset, \"text\"), batch_size = batch_size)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "res = pd.concat(res)\n",
    "test = test.reset_index(drop = True)\n",
    "res = res.reset_index(drop = True)\n",
    "out = pd.concat([test, res], axis = 1)\n",
    "out = out.reset_index(drop = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "out.to_parquet(\"test_distilbert.parquet\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
